#include <image3d.h>
#include <imgfiles.h>
#include <morphology.h>
#include <stdio.h>
#include <cuda_runtime.h>
#include <iostream>
#include <ctime>
#include <cstring>
#include "gcuda.h"
#include "main.h"

using namespace std;
using namespace i3d;

template <class T> void ComputeGranulo2(const Image3d<T> &orig, string filename, int numOfOpenings)
{
  cerr << "Started CPU implementation";
  // definicia vektorov
  vector<double> v1, v2;
  v1.resize(numOfOpenings); v2.resize(numOfOpenings);

  // ulozenie velkosti obtazku
  int Nx = orig.GetSizeX();
  int Ny = orig.GetSizeY();

  // pomocne premenne
  int iterator = 0, orig_intensity_sum = 0, nonzero_pixels = 0;
  int sum_intensity = 0, num_diff_pixels = 0;

  // ulozenie originalneho snimku a spustenie prvych 20 iteracii s krokom 1
  Image3d<T> img = orig;
  for (int k = 1; k < numOfOpenings+1; k++)
  {
    OpeningO(orig, img, k);  // vypovet otvorenia
    for (int j = 0; j < Ny; j++) {
      for (int i = 0; i < Nx; i++) {

        // vypocet sumy intenzit originalneho snimku, nonzero_pixels pocita pocet nenulovych pixelov v originale
        if (k == 1)
        {
          orig_intensity_sum += orig.GetVoxel(i,j,0);
          if (orig.GetVoxel(i,j,0) > 0) nonzero_pixels++;
        }

        // num_diff_pixels pocita pocet pixelov, sum_intensity sumu intenzit
        if ((orig.GetVoxel(i,j,0) - img.GetVoxel(i,j,0)) > 0) num_diff_pixels++;
        sum_intensity += (orig.GetVoxel(i,j,0) - img.GetVoxel(i,j,0));
      }
    }

    // ulozenie vypocitanych sum do vektorov
    v1[iterator] = sum_intensity;
    v2[iterator] = num_diff_pixels;

    cerr << "."; // report progress

    // vynulovanie pocitadiel
    sum_intensity = 0;
    num_diff_pixels = 0;
    iterator++;
  }
  cerr << endl;

  cout << endl << "CPU Implementation Output:" << endl;
  // printf("#objectKey messif.objects.keys.AbstractObjectKey %s\n", (filename.compare(TEMP_FILE_NAME) == 0) ? "stdin" : filename.c_str() );
  // printf("#objectKey messif.objects.keys.AbstractObjectKey %s\n", filename.c_str());
  printf("Granulo2_intEnd;messif.objects.impl.GranulometryThresholdL1;Granulo2_intSimple;messif.objects.impl.GranulometryThresholdL1;Granulo2_pixEnd;messif.objects.impl.ObjectFloatVectorL1;Granulo2_pixSimple;messif.objects.impl.ObjectFloatVectorL1;Granulo2_intEnd_deriv;messif.objects.impl.ObjectFloatVectorL1;\n");

  // vypis normalizovanej sumy intenzit na poslednu hodnotu
  for (int i=0; i < numOfOpenings-1; i++) cout << (double)v1[i]/v1[iterator-1] << ",";
  cout << (double)v1[numOfOpenings-1]/v1[iterator-1] << endl;

  // vypis normalizovanej sumy intenzit na sumu intenzit
  for (int i=0; i < numOfOpenings-1; i++) cout << (double)v1[i]/orig_intensity_sum << ",";
  cout << (double)v1[numOfOpenings-1]/orig_intensity_sum << endl;

  // vypis normalizovaneho poctu pixelov na poslednu hodnotu
  for (int i=0; i < numOfOpenings-1; i++) cout << (double)v2[i]/v2[iterator-1] << ",";
  cout << (double)v2[numOfOpenings-1]/v2[iterator-1] << endl;

  // vypis normalizovaneho poctu pixelov na pocet nenulovych pixelov
  for (int i=0; i < numOfOpenings-1; i++) cout << (double)v2[i]/nonzero_pixels << ",";
  cout << (double)v2[numOfOpenings-1]/nonzero_pixels << endl;

  // vypis diferencii normalizovanej sumy intenzit na poslednu hodnotu
  for (int i=1; i < 5; i++) cout << (double)v1[i]/v1[iterator-1] - (double)v1[i-1]/v1[iterator-1] << ",";
  cout << (double)v1[5]/v1[iterator-1] - (double)v1[4]/v1[iterator-1] << endl;
}

template <class T> Image3d<T> Open(Image3d<T> &img, unsigned int N)
{
  const Image3d<T> orig = img;
  Image3d<T> result;
  OpeningO(orig, result, N);
  return result;
}

template <class T> Image3d<T> Erode(Image3d<T> &img, unsigned int N)
{
  const Image3d<T> orig = img;
  Image3d<T> result;
  ErosionO(orig, result, N);
  return result;
}

void usage(const char *str)
{
  cerr << str << " - GPU accelerated granulodescriptor. \n"
    "The goal of this program was to create a GPU implementation of morphologic\n"
    "granulodescriptor and compare it with an existing CPU implementation.\n\n";
  cerr << "Usage:" << endl << endl;
  cerr << "$> " << str << " [-h|-c|-s|-o <filename>|-n X|-w] <input file>" << endl << endl;
  cerr << "   -h[elp]     Show this usage information." << endl;
  cerr << "   -c[pu]      if present, indicates that the CPU implementation should run too" << endl;
  cerr << "               (for comparison)." << endl;
  cerr << "   -s[upress]  if present, supresses the execution of the GPU implementation" << endl;
  cerr << "               (implies -c)." << endl;
  cerr << "   -o[pen] <filename>   only perform an opening instead of granulodescriptor compu-" << endl;
  cerr << "                        tation. <filename> is the output filename for opening." << endl;
  cerr << "                        A .png extension will be postfixed automatically." << endl;
  cerr << "   -n[umber] X if present, overrides the default number of openings (25)" << endl;
  cerr << "               and sets it to the supplied value." << endl;
  cerr << "   -w[ait]     waits for the user to press Enter after execution is completed" << endl;
  cerr << "               before closing the program." << endl << endl;
  cerr << "EXAMPLE 1:" << endl;
  cerr << "$> " << str << " obrazek.png" << endl << endl;
  cerr << "EXAMPLE 2:" << endl;
  cerr << "$> " << str << " -c -n 16 obrazek.png" << endl;
  cerr << endl;
}

int main (int argc, char *argv[])
{
  if (argc < 2)
  {
    usage(argv[0]);
    exit(-1);
  }

  // parse options
  bool _HOption = false; // print help
  bool _COption = false; // if present, indicates that the CPU implementation should run
  bool _SOption = false; // if present, supresses the execution of the GPU implementation
  bool _OOption = false; // only open, don't perform granulo
  char* _OFname;         // output filename for opening
  int _NOpenings = 25;   // if present, overrides the default number of openings (25) and sets it to the supplied value.
  bool _WOption = false; // if present, waits for the user to press Enter after computations are complete or errors occured before closing the program.
  char* filename;
  for (int i = 1; i < argc; ++i) {
    char *arg = argv[i];
    if (strcmp(arg, "-h") * strcmp(arg, "--help") == 0)
    {
      _HOption = true;
    }
    if (strcmp(arg, "-c") * strcmp(arg, "--cpu") == 0)
    {
      _COption = true;
    }
    if (strcmp(arg, "-s") * strcmp(arg, "--supress") == 0)
    {
      _COption = true;
      _SOption = true;
    }
    if (strcmp(arg, "-n") * strcmp(arg, "--number") == 0)
    {
      if (i + 2 < argc) _NOpenings = atoi(argv[i+1]);
    }
    if (strcmp(arg, "-o") * strcmp(arg, "--open") == 0)
    {
      if (i + 2 < argc) {
        _OFname = argv[i+1];
        _OOption = true;
      } else {
        cerr << "You need to provide an output file name for opening." << endl;
      }
    }
    if (strcmp(arg, "-w") * strcmp(arg, "--wait") == 0)
    {
      _WOption = true;
    }
  }

  string _OFnameC("");
  string _OFnameG("");
  if (_OOption) {
    _OFnameC.append(_OFname).append("_c.png");
    _OFnameG.append(_OFname).append("_g.png");
  }

  if (_HOption) {
    usage(argv[0]);
    if (_WOption) { cout << "Press Enter to exit..." << endl; cin.ignore(); }
    exit (0);
  }

  filename = argv[argc - 1];

  {
    bool forcedSOption = !checkCudaSupport();
    if (forcedSOption) {
      cerr << "Sorry, you don't seem to have any supported CUDA-capable devices..." << endl;
      cerr << "We'll need to run this computation on your CPU." << endl;
      _COption = true;
    }
    _SOption = _SOption || forcedSOption;
  }

  try
  {
    cerr << "Checking voxel type" << endl;

    ImgVoxelType vt = ReadImageType (filename);
    //cerr << "Voxel=" << VoxelTypeToString (vt) << endl;

    clock_t begin, end;
    double elapsed_c = 0;
    double elapsed_g = 0;

    switch (vt)
    {
    case Gray8Voxel:
      {
        Image3d<GRAY8> img(filename);

        if (_COption) {
          begin = clock();
          if (_OOption) {
            Image3d<GRAY8> rslt = Open(img, _NOpenings);
            rslt.SaveImage(_OFnameC.c_str());
          }
          else {
            ComputeGranulo2(img, filename, _NOpenings);
          }
          end = clock();
          elapsed_c = double(end - begin) / CLOCKS_PER_SEC;
        }
        if (!_SOption) {
          begin = clock();
          if (_OOption) {
            Image3d<GRAY8> rslt = gcuda::OpenGpuGray8(img, _NOpenings);
            rslt.SaveImage(_OFnameG.c_str());
          }
          else {
            gcuda::GranuloGpuGray8(img, _NOpenings);
          }
          end = clock();
          elapsed_g = double(end - begin) / CLOCKS_PER_SEC;
        }
        break;
      }
    case Gray16Voxel:
      {
        Image3d<GRAY16> img(filename);

        if (_COption) {
          begin = clock();
          if (_OOption) {
            Image3d<GRAY16> rslt = Open(img, _NOpenings);
            rslt.SaveImage(_OFnameC.c_str());
          }
          else {
            ComputeGranulo2(img, filename, _NOpenings);
          }
          end = clock();
          elapsed_c = double(end - begin) / CLOCKS_PER_SEC;
        }
        if (!_SOption) {
          begin = clock();
          if (_OOption) {
            Image3d<GRAY16> rslt = gcuda::OpenGpuGray16(img, _NOpenings);
            rslt.SaveImage(_OFnameG.c_str());
          }
          else {
            gcuda::GranuloGpuGray16(img, _NOpenings);
          }
          end = clock();
          elapsed_g = double(end - begin) / CLOCKS_PER_SEC;
        }
        break;
      }
    default:
      {
        cerr << argv[0] << ": Unsupported voxel type."<< endl;
        cerr << "Sorry, currently only 8-bit grayscale images are supported." << endl;
        exit(-1);
      }
    }
    cerr << endl << "     Openings: "<< _NOpenings << endl;
    if (_COption) {
      cerr       << "  CPU elapsed: " << elapsed_c << " s" << endl;
    }
    if (!_SOption) {
      cerr       << "  GPU elapsed: " << elapsed_g << " s" << endl;
    }
    cerr << endl;
  }
  catch (IOException & e)
  {
    cerr << e << endl;
    if (_WOption) { cout << "Press Enter to exit..." << endl; cin.ignore(); }
    exit (-1);
  }
  catch (InternalException & e)
  {
    cerr << e << endl;
    if (_WOption) { cout << "Press Enter to exit..." << endl; cin.ignore(); }
    exit (-1);
  }
  catch (bad_alloc &)
  {
    cerr << "Not enough memory." << endl;
    if (_WOption) { cout << "Press Enter to exit..." << endl; cin.ignore(); }
    exit (-1);
  }
  catch (...)
  {
    cerr << "Unknown exception." << endl;
    if (_WOption) { cout << "Press Enter to exit..." << endl; cin.ignore(); }
    exit (-1);
  }

  if (_WOption) { cout << "Press Enter to exit..." << endl; cin.ignore(); }

  return 0;
}

int checkCudaSupport() {
  int deviceCount, device;
  int gpuDeviceCount = 0;
  struct cudaDeviceProp properties;
  cudaError_t cudaResultCode = cudaGetDeviceCount(&deviceCount);
  if (cudaResultCode != cudaSuccess)
    deviceCount = 0;
  /* machines with no GPUs can still report one emulation device */
  for (device = 0; device < deviceCount; ++device) {
    cudaGetDeviceProperties(&properties, device);
    if (properties.major != 9999) /* 9999 means emulation only */
      ++gpuDeviceCount;
  }
  printf("%d GPU CUDA device(s) found\n", gpuDeviceCount);

  /* don't just return the number of gpus, because other runtime cuda
  errors can also yield non-zero return values */
  if (gpuDeviceCount > 0)
    return true; /* success */
  else
    return false; /* failure */
}

int MAIN__() { return 0; }
